This is a project that uses Canadian General Election data to create visualizations. Original dataset can be found here: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/ABFNSQ

Install packages

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.2'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)

read csv

Election_data <- read_csv("federal-candidates-2021-10-20.csv")
## Rows: 46526 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (22): type_elxn, elected, candidate_name, incumbent, gender, country_bi...
## dbl   (8): id, parliament, year, birth_year, riding_id, votes, percent_votes...
## date  (1): edate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Look at structure

str(Election_data)
## spc_tbl_ [46,526 × 31] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id                : num [1:46526] 26093 13011 27974 18040 1798 ...
##  $ parliament        : num [1:46526] 1 1 1 1 1 1 1 1 1 1 ...
##  $ year              : num [1:46526] 1867 1867 1867 1867 1867 ...
##  $ type_elxn         : chr [1:46526] "General" "General" "General" "General" ...
##  $ elected           : chr [1:46526] "Elected" "Elected" "Not elected" "Elected" ...
##  $ candidate_name    : chr [1:46526] "POWER," "JONES," "SHANNON, S.L." "KIRKPATRICK, Thomas" ...
##  $ edate             : Date[1:46526], format: "1867-08-07" "1867-08-07" ...
##  $ incumbent         : chr [1:46526] "Not incumbent" "Not incumbent" "Not incumbent" "Not incumbent" ...
##  $ gender            : chr [1:46526] "M" "M" "M" "M" ...
##  $ birth_year        : num [1:46526] 1815 1824 NA 1805 1829 ...
##  $ country_birth     : chr [1:46526] NA NA NA NA ...
##  $ lgbtq2_out        : chr [1:46526] NA NA NA NA ...
##  $ indigenousorigins : chr [1:46526] "Other" "Other" "Other" "Other" ...
##  $ occupation        : chr [1:46526] "merchant" "merchant" "lawyer" "lawyer" ...
##  $ lawyer            : chr [1:46526] "Other" "Other" "Lawyer" "Lawyer" ...
##  $ censuscategory    : chr [1:46526] "Sales and service occupations" "Sales and service occupations" "Occupations in education, law and social, community and government services" "Occupations in education, law and social, community and government services" ...
##  $ riding_id         : num [1:46526] NA NA NA NA NA NA NA NA NA NA ...
##  $ riding            : chr [1:46526] "HALIFAX" "HALIFAX" "HALIFAX" "FRONTENAC" ...
##  $ province          : chr [1:46526] "Nova Scotia" "Nova Scotia" "Nova Scotia" "Ontario" ...
##  $ votes             : num [1:46526] 2367 2381 2154 1242 NA ...
##  $ percent_votes     : num [1:46526] 26.1 26.3 23.8 64.2 100 ...
##  $ acclaimed         : chr [1:46526] "Not acclaimed" "Not acclaimed" "Not acclaimed" "Not acclaimed" ...
##  $ switcher          : chr [1:46526] "Switcher" "Switcher" "Not switcher" "Not switcher" ...
##  $ multiple_candidacy: chr [1:46526] "Single" "Single" "Single" "Single" ...
##  $ party_raw         : chr [1:46526] "Anti-Confederate" "Labour" "Unknown" "Conservative" ...
##  $ party_minor_group : chr [1:46526] "Third_Party" "Labour" "Independent" "Conservative" ...
##  $ party_major_group : chr [1:46526] "Third_Party" "Third_Party" "Independent" "Conservative" ...
##  $ gov_party_raw     : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
##  $ gov_minor_group   : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
##  $ gov_major_group   : chr [1:46526] "Conservative" "Conservative" "Conservative" "Conservative" ...
##  $ num_candidates    : num [1:46526] 4 4 4 2 1 2 2 1 2 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   parliament = col_double(),
##   ..   year = col_double(),
##   ..   type_elxn = col_character(),
##   ..   elected = col_character(),
##   ..   candidate_name = col_character(),
##   ..   edate = col_date(format = ""),
##   ..   incumbent = col_character(),
##   ..   gender = col_character(),
##   ..   birth_year = col_double(),
##   ..   country_birth = col_character(),
##   ..   lgbtq2_out = col_character(),
##   ..   indigenousorigins = col_character(),
##   ..   occupation = col_character(),
##   ..   lawyer = col_character(),
##   ..   censuscategory = col_character(),
##   ..   riding_id = col_double(),
##   ..   riding = col_character(),
##   ..   province = col_character(),
##   ..   votes = col_double(),
##   ..   percent_votes = col_double(),
##   ..   acclaimed = col_character(),
##   ..   switcher = col_character(),
##   ..   multiple_candidacy = col_character(),
##   ..   party_raw = col_character(),
##   ..   party_minor_group = col_character(),
##   ..   party_major_group = col_character(),
##   ..   gov_party_raw = col_character(),
##   ..   gov_minor_group = col_character(),
##   ..   gov_major_group = col_character(),
##   ..   num_candidates = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(Election_data)
##        id          parliament         year       type_elxn        
##  Min.   :    1   Min.   : 1.00   Min.   :1867   Length:46526      
##  1st Qu.: 6478   1st Qu.:21.00   1st Qu.:1949   Class :character  
##  Median :22031   Median :32.00   Median :1980   Mode  :character  
##  Mean   :18474   Mean   :28.64   Mean   :1970                     
##  3rd Qu.:28613   3rd Qu.:38.00   3rd Qu.:2004                     
##  Max.   :36769   Max.   :44.00   Max.   :2021                     
##                                                                   
##    elected          candidate_name         edate             incumbent        
##  Length:46526       Length:46526       Min.   :1867-08-07   Length:46526      
##  Class :character   Class :character   1st Qu.:1949-06-27   Class :character  
##  Mode  :character   Mode  :character   Median :1980-02-18   Mode  :character  
##                                        Mean   :1970-11-14                     
##                                        3rd Qu.:2004-06-28                     
##                                        Max.   :2021-09-20                     
##                                                                               
##     gender            birth_year    country_birth       lgbtq2_out       
##  Length:46526       Min.   :1798    Length:46526       Length:46526      
##  Class :character   1st Qu.:1864    Class :character   Class :character  
##  Mode  :character   Median :1908    Mode  :character   Mode  :character  
##                     Mean   :1902                                         
##                     3rd Qu.:1942                                         
##                     Max.   :1998                                         
##                     NA's   :34250                                        
##  indigenousorigins   occupation           lawyer          censuscategory    
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    riding_id        riding            province             votes      
##  Min.   : 6001   Length:46526       Length:46526       Min.   :    0  
##  1st Qu.:24048   Class :character   Class :character   1st Qu.: 1094  
##  Median :35052   Mode  :character   Mode  :character   Median : 4058  
##  Mean   :35464                                         Mean   : 6975  
##  3rd Qu.:47006                                         3rd Qu.:10371  
##  Max.   :62001                                         Max.   :71535  
##  NA's   :35487                                         NA's   :691    
##  percent_votes      acclaimed           switcher         multiple_candidacy
##  Min.   :  0.000   Length:46526       Length:46526       Length:46526      
##  1st Qu.:  4.268   Class :character   Class :character   Class :character  
##  Median : 23.124   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 26.351                                                           
##  3rd Qu.: 44.760                                                           
##  Max.   :100.000                                                           
##  NA's   :73                                                                
##   party_raw         party_minor_group  party_major_group  gov_party_raw     
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  gov_minor_group    gov_major_group    num_candidates  
##  Length:46526       Length:46526       Min.   : 1.000  
##  Class :character   Class :character   1st Qu.: 3.000  
##  Mode  :character   Mode  :character   Median : 5.000  
##                                        Mean   : 4.771  
##                                        3rd Qu.: 6.000  
##                                        Max.   :21.000  
## 

Check for duplucates

duplicate_test <- Election_data %>% duplicated()
sum(duplicate_test)
## [1] 0

Remove columns with irrelevant information

Election_data1 <- Election_data %>% select(-1,-10)
summary(Election_data1)
##    parliament         year       type_elxn           elected         
##  Min.   : 1.00   Min.   :1867   Length:46526       Length:46526      
##  1st Qu.:21.00   1st Qu.:1949   Class :character   Class :character  
##  Median :32.00   Median :1980   Mode  :character   Mode  :character  
##  Mean   :28.64   Mean   :1970                                        
##  3rd Qu.:38.00   3rd Qu.:2004                                        
##  Max.   :44.00   Max.   :2021                                        
##                                                                      
##  candidate_name         edate             incumbent            gender         
##  Length:46526       Min.   :1867-08-07   Length:46526       Length:46526      
##  Class :character   1st Qu.:1949-06-27   Class :character   Class :character  
##  Mode  :character   Median :1980-02-18   Mode  :character   Mode  :character  
##                     Mean   :1970-11-14                                        
##                     3rd Qu.:2004-06-28                                        
##                     Max.   :2021-09-20                                        
##                                                                               
##  country_birth       lgbtq2_out        indigenousorigins   occupation       
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     lawyer          censuscategory       riding_id        riding         
##  Length:46526       Length:46526       Min.   : 6001   Length:46526      
##  Class :character   Class :character   1st Qu.:24048   Class :character  
##  Mode  :character   Mode  :character   Median :35052   Mode  :character  
##                                        Mean   :35464                     
##                                        3rd Qu.:47006                     
##                                        Max.   :62001                     
##                                        NA's   :35487                     
##    province             votes       percent_votes      acclaimed        
##  Length:46526       Min.   :    0   Min.   :  0.000   Length:46526      
##  Class :character   1st Qu.: 1094   1st Qu.:  4.268   Class :character  
##  Mode  :character   Median : 4058   Median : 23.124   Mode  :character  
##                     Mean   : 6975   Mean   : 26.351                     
##                     3rd Qu.:10371   3rd Qu.: 44.760                     
##                     Max.   :71535   Max.   :100.000                     
##                     NA's   :691     NA's   :73                          
##    switcher         multiple_candidacy  party_raw         party_minor_group 
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  party_major_group  gov_party_raw      gov_minor_group    gov_major_group   
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  num_candidates  
##  Min.   : 1.000  
##  1st Qu.: 3.000  
##  Median : 5.000  
##  Mean   : 4.771  
##  3rd Qu.: 6.000  
##  Max.   :21.000  
## 

Change Data types for categories

Election_data2 <-  mutate(Election_data,Party_new=as.factor(party_raw),
                          Gender_new=as.factor(gender),
                          Elect_new=as.factor(elected),
                          Incumbent_new=as.factor(incumbent),
                          Country_birth_new=as.factor(country_birth),
                          lgbtq2_new=as.factor(lgbtq2_out),
                          indigenous_new=as.factor(indigenousorigins),
                          occupation_new=as.factor(occupation),
                          Lawyer_new=as.factor(lawyer),
                          census_new=as.factor(censuscategory),
                          riding_new=as.factor(riding),
                          province_new=as.factor(province))
summary(Election_data2)
##        id          parliament         year       type_elxn        
##  Min.   :    1   Min.   : 1.00   Min.   :1867   Length:46526      
##  1st Qu.: 6478   1st Qu.:21.00   1st Qu.:1949   Class :character  
##  Median :22031   Median :32.00   Median :1980   Mode  :character  
##  Mean   :18474   Mean   :28.64   Mean   :1970                     
##  3rd Qu.:28613   3rd Qu.:38.00   3rd Qu.:2004                     
##  Max.   :36769   Max.   :44.00   Max.   :2021                     
##                                                                   
##    elected          candidate_name         edate             incumbent        
##  Length:46526       Length:46526       Min.   :1867-08-07   Length:46526      
##  Class :character   Class :character   1st Qu.:1949-06-27   Class :character  
##  Mode  :character   Mode  :character   Median :1980-02-18   Mode  :character  
##                                        Mean   :1970-11-14                     
##                                        3rd Qu.:2004-06-28                     
##                                        Max.   :2021-09-20                     
##                                                                               
##     gender            birth_year    country_birth       lgbtq2_out       
##  Length:46526       Min.   :1798    Length:46526       Length:46526      
##  Class :character   1st Qu.:1864    Class :character   Class :character  
##  Mode  :character   Median :1908    Mode  :character   Mode  :character  
##                     Mean   :1902                                         
##                     3rd Qu.:1942                                         
##                     Max.   :1998                                         
##                     NA's   :34250                                        
##  indigenousorigins   occupation           lawyer          censuscategory    
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    riding_id        riding            province             votes      
##  Min.   : 6001   Length:46526       Length:46526       Min.   :    0  
##  1st Qu.:24048   Class :character   Class :character   1st Qu.: 1094  
##  Median :35052   Mode  :character   Mode  :character   Median : 4058  
##  Mean   :35464                                         Mean   : 6975  
##  3rd Qu.:47006                                         3rd Qu.:10371  
##  Max.   :62001                                         Max.   :71535  
##  NA's   :35487                                         NA's   :691    
##  percent_votes      acclaimed           switcher         multiple_candidacy
##  Min.   :  0.000   Length:46526       Length:46526       Length:46526      
##  1st Qu.:  4.268   Class :character   Class :character   Class :character  
##  Median : 23.124   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 26.351                                                           
##  3rd Qu.: 44.760                                                           
##  Max.   :100.000                                                           
##  NA's   :73                                                                
##   party_raw         party_minor_group  party_major_group  gov_party_raw     
##  Length:46526       Length:46526       Length:46526       Length:46526      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  gov_minor_group    gov_major_group    num_candidates  
##  Length:46526       Length:46526       Min.   : 1.000  
##  Class :character   Class :character   1st Qu.: 3.000  
##  Mode  :character   Mode  :character   Median : 5.000  
##                                        Mean   : 4.771  
##                                        3rd Qu.: 6.000  
##                                        Max.   :21.000  
##                                                        
##                         Party_new     Gender_new         Elect_new    
##  Liberal                     :10457   2   :    2   Elected    :12386  
##  New Democratic Party        : 5918   F   : 6585   Not elected:34140  
##  Progressive Conservative    : 5015   M   :39938                      
##  Conservative                : 3373   NA's:    1                      
##  Green Party of Canada       : 2606                                   
##  Conservative Party of Canada: 2281                                   
##  (Other)                     :16876                                   
##        Incumbent_new       Country_birth_new   lgbtq2_new       indigenous_new 
##  Incumbent    : 9337   Canada       :  462   Not Out: 2003   Indigenous:  236  
##  Not incumbent:37121   India        :   10   Out    :  134   Other     :46290  
##  NA's         :   68   Hong Kong    :    8   NA's   :44389                     
##                        United States:    8                                     
##                        Lebanon      :    5                                     
##                        (Other)      :   50                                     
##                        NA's         :45983                                     
##          occupation_new   Lawyer_new   
##  lawyer         : 3940   Lawyer: 5807  
##  farmer         : 2624   Other :36328  
##  teacher        : 1536   NA's  : 4391  
##  parliamentarian: 1240                 
##  merchant       : 1195                 
##  (Other)        :31870                 
##  NA's           : 4121                 
##                                                                        census_new   
##  Occupations in education, law and social, community and government services:13060  
##  Business, finance and administration occupations                           : 6233  
##  Sales and service occupations                                              : 4288  
##  Natural resources, agriculture and related production occupations          : 3511  
##  Members of Parliament                                                      : 2404  
##  (Other)                                                                    :11434  
##  NA's                                                                       : 5596  
##             riding_new              province_new  
##  VICTORIA        :  307   Ontario         :15965  
##  HALIFAX         :  228   Quebec          :13026  
##  VANCOUVER CENTRE:  186   British Columbia: 4606  
##  YORK WEST       :  164   Alberta         : 3401  
##  HOCHELAGA       :  155   Manitoba        : 2279  
##  VANCOUVER EAST  :  152   Saskatchewan    : 2106  
##  (Other)         :45334   (Other)         : 5143

Filter more

Election_data3 <-  Election_data2 %>%  select(year,elected,gender,riding,province,votes,percent_votes,party_raw)

Only elected

Election_data4 <- Election_data3 %>% filter(elected == "Elected")
summary(Election_data4)
##       year        elected             gender             riding         
##  Min.   :1867   Length:12386       Length:12386       Length:12386      
##  1st Qu.:1917   Class :character   Class :character   Class :character  
##  Median :1958   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1952                                                           
##  3rd Qu.:1993                                                           
##  Max.   :2021                                                           
##                                                                         
##    province             votes       percent_votes     party_raw        
##  Length:12386       Min.   :   43   Min.   : 20.18   Length:12386      
##  Class :character   1st Qu.: 5081   1st Qu.: 45.70   Class :character  
##  Mode  :character   Median :12442   Median : 52.06   Mode  :character  
##                     Mean   :13808   Mean   : 54.44                     
##                     3rd Qu.:20994   3rd Qu.: 58.86                     
##                     Max.   :71535   Max.   :100.00                     
##                     NA's   :652     NA's   :34
Election_data5 <- Election_data4 %>% group_by(party_raw,province,year) %>% summarise(count=n()) %>% arrange(-count)
## `summarise()` has grouped output by 'party_raw', 'province'. You can override
## using the `.groups` argument.
summary(Election_data5)
##   party_raw           province              year          count        
##  Length:1670        Length:1670        Min.   :1867   Min.   :  1.000  
##  Class :character   Class :character   1st Qu.:1903   1st Qu.:  1.000  
##  Mode  :character   Mode  :character   Median :1945   Median :  2.000  
##                                        Mean   :1944   Mean   :  7.417  
##                                        3rd Qu.:1980   3rd Qu.:  7.000  
##                                        Max.   :2021   Max.   :101.000

Histogram of elected candidates from 1867-2021 show Liberal candidates are dominant

histo <- ggplot(data=Election_data4) + geom_bar(mapping=aes(x=party_raw),na.rm=FALSE) +  theme(axis.text.x = element_text(angle = 90))

Create a circular barplot visualization for elected MPs in 2021

—– This section prepare a dataframe for labels —-

Get the name and the y position of each label

Election_data6 <- Election_data5 %>% filter(year==2021)
Election_data6$id = Election_data6$consecutive_numbers<-1:30
label_data <- Election_data6

calculate the ANGLE of the labels

number_of_bar <- nrow(label_data)
angle <-  90 - 360 * (label_data$id-0.5) /number_of_bar 

I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)

calculate the alignment of labels: right or left

If I am on the left part of the plot, my labels have currently an angle < -90

label_data$hjust<-ifelse( angle < -90, 1, 0)

flip angle BY to make them readable

label_data$angle<-ifelse(angle < -90, angle+180, angle)

#mutate

label_data$combined = paste(label_data$party_raw,label_data$province)

Make the plot

p <- ggplot(label_data, aes(x=as.factor(id), y=count,alpha=count)) +      
  geom_bar(stat="identity",fill="Green") +
  ylim(-100,200) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.margin = unit(rep(-1,4), "cm")      # Adjust the margin to make in sort labels are not truncated!
  ) +
  coord_polar(start = 0) +
  geom_text(data=label_data, aes(x=id, y=count+10, label=combined, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE )